import sys, os

sys.path.insert(1, os.path.join("..","..",".."))
from h2o.automl import H2OAutoML
from tests import pyunit_utils as pu

from _automl_utils import import_dataset, get_partitioned_model_names


def test_exploitation_disabled():
    ds = import_dataset()
    aml = H2OAutoML(project_name="py_exploitation_ratio_disabled",
                    exploitation_ratio=.0,
                    max_runtime_secs=30,
                    seed=1)
    aml.train(y=ds.target, training_frame=ds.train)
    assert 'start_GBM_lr_annealing' not in aml.training_info
    assert 'start_XGBoost_lr_search' not in aml.training_info


def test_exploitation_doesnt_impact_max_models():
    ds = import_dataset()
    aml = H2OAutoML(project_name="py_exploitation_ratio_max_models",
                    exploitation_ratio=.1,
                    max_models=6,
                    seed=1, 
                    verbosity='debug')
    aml.train(y=ds.target, training_frame=ds.train)
    print(aml.leaderboard)
    models = get_partitioned_model_names(aml.leaderboard)
    assert len(models.base) == 6
    assert len(models.se) == 2
    print(aml.training_info)
    assert 'start_GBM_lr_annealing' in aml.training_info
    assert 'start_XGBoost_lr_search' in aml.training_info


def test_exploitation_impacts_exploration_duration():
    ds = import_dataset()
    planned_duration = 60
    aml = H2OAutoML(project_name="py_exploitation_ratio_max_runtime",
                    exploitation_ratio=.5,  # excessive ratio on purpose, due to training overheads in multinode
                    exclude_algos=['DeepLearning', 'XGBoost'],  # removing some algos for the same reason
                    max_runtime_secs=planned_duration,
                    seed=1,
                    verbosity='info'
                    )
    aml.train(y=ds.target, training_frame=ds.train)
    automl_start = int(aml.training_info['start_epoch'])
    assert 'start_GBM_lr_annealing' in aml.training_info
    # assert 'start_XGBoost_lr_search' in aml.training_info
    first_exploitation_step = 'start_GBM_lr_annealing'
    after_exploitation_step = 'start_completion_GBM_grid_1'
    if first_exploitation_step in aml.training_info and after_exploitation_step in aml.training_info:
        exploitation_start = int(aml.training_info[first_exploitation_step])
        exploration_duration = exploitation_start - automl_start
        after_start = int(aml.training_info[after_exploitation_step])
        exploitation_duration = after_start - exploitation_start
        # can't reliably check duration ratio
        assert 0 < exploration_duration < planned_duration
        print(aml.leaderboard)
        print(exploitation_duration)
        print(exploration_duration)
        assert 0 < exploitation_duration < exploration_duration
    else:
        print(aml.leaderboard)
        print("budget time was too small to start and complete exploitation")


pu.run_tests([
    test_exploitation_disabled,
    test_exploitation_doesnt_impact_max_models,
    # test_exploitation_impacts_exploration_duration, # disabled as it needs to be rethought after 3.34 modeling plan 
])
